# Replication package for 
# "The Economic Leverage of International Organizations in Interstate Disputes"
# Johannes Karreth
# June 30, 2017
# jkarreth@ursinus.edu

# This file: 11_claims_readdata.R
# Purpose: Prepare data for analysis of claims
# Execute <claims_createdata.R> to recreate claims_full.csv

rm(list = ls())
# setwd("...")

library("dplyr")
library("rio")

# Source in functions
source("Functions/theme_jk.R")

# Load and prepare data

dat <- import("claims_full.csv")
dat$host <- ifelse(dat$maxhost > 0, 1, 0)
dat$useforce <- ifelse(dat$maxhost > 3, 1, 0)
dat$attbilat1 <- ifelse(dat$attbilat >= 1, 1, 0)

dat$war <- ifelse(dat$maxhost > 4, 1, 0)
dat$maxhost <- as.factor(dat$maxhost)
dat$duration_ln <- log(dat$duration)
dat$midfat <- ifelse(dat$midsfat > 0, 1, 0)
dat$c1_igo_lev3_excl_use <- dat$c1_igo_lev3_count_use - dat$igo_lev3_count_use_bc
dat$c2_igo_lev3_excl_use <- dat$c2_igo_lev3_count_use - dat$igo_lev3_count_use_bc
dat$cincdif <- log(abs(dat$cinc1 - dat$cinc2))
dat$atrade <- c(dat$expab / (dat$tottra + 0.00000001))
dat$btrade <-  c(dat$expba / (dat$tottrb  + 0.00000001))
dat$trademin <- apply(dat[, c("atrade", "btrade")], 1, min)
dat$trademax <- apply(dat[, c("atrade", "btrade")], 1, max)
dat$tradeimbal <- log(abs(dat$trademin - dat$trademax) + 0.0001)
dat$tradedep1 <- with(dat, {log((expab + expba) / cgdpca + 0.0001)})
dat$tradedep2 <- with(dat, {log((expab + expba) / cgdpcb + 0.0001)})
dat$tradedepmin <- apply(dat[, c("tradedep1", "tradedep2")], 1, min)
dat$tradedepmax <- apply(dat[, c("tradedep1", "tradedep2")], 1, max)
dat$gdppc_low_ln <- log(dat$gdppc_low)
dat$igo_ingr23_use_bc = dat$igo_ingr2_use_bc + dat$igo_ingr3_use_bc
dat$regionFactor <- factor(dat$region)
dat$coldWar <- factor(ifelse(dat$year < 1991, 1, 0))
dat$igo_allothers_bc <- dat$igo_ingr23_use_bc - dat$igo_lev3_count_use_bc + dat$igo_pb_use

# All analyses only post-1945

dat <- dat[dat$yearbeg > 1945, ]

dat$decade <- ifelse(dat$year < 1950, 0, 
                     ifelse(dat$year >= 1950 & dat$year < 1960, 1,
                            ifelse(dat$year >= 1960 & dat$year < 1970, 2,
                                   ifelse(dat$year >= 1970 & dat$year < 1980, 3,
                                          ifelse(dat$year >= 1980 & dat$year < 1990, 4,
                                                 5)))))
dat$decadeFactor <- factor(dat$decade)

## Fill in UNGA voting data from Bailey et al.

# Russia-Kazakhstan 1991: use next available from 1992
# 365    705 1991
dat[dat$dyadid == 365705 & dat$year == 1991, ]$absidealdiff <- .472

# Turkmenistan-Azerbaijan 1991: use next available from 1992
# 701    373 1991
dat[dat$dyadid == 373701 & dat$year == 1991, ]$absidealdiff <- .029

# Turkmenistan-Iran 1991: use next available from 1992
# 701    630 1991
dat[dat$dyadid == 630701 & dat$year == 1991, ]$absidealdiff <- 1.232

# Turkey-Iraq 1999: use last available from 1994
# 645    640 1999
dat[dat$dyadid == 640645 & dat$year == 1999, ]$absidealdiff <- 2.31

## Multiple claims per dyad per year?
dat$dyadid_year <- dat$dyadid * 10000 + dat$year
# 23651965
# 203901971
# 800901981
# 800911981
# 911001982
# 912001981
# 931001979
# 1011101966 
# 1012001951 
# 1101151975 
# 1102101966
# 2652901985
# 6406521964
# 6526661953
# 6636661989

dat_nomult <- mutate(group_by(dat, dyadid_year),
                     useforce_max = max(useforce),
                     obs = n(), 
                     count = 1:n())

# Check the first 100 cases
head(data.frame(dat_nomult)[, c("dyadid", "year", "name", "useforce", "saltan", "useforce_max", "obs", "count")], n = 100)

dat_nomult$drop <- ifelse(dat_nomult$obs >= 2 & dat_nomult$useforce < dat_nomult$useforce_max, 1, 0)
dat_nomult$drop <- ifelse(dat_nomult$obs >= 2 & dat_nomult$useforce == 0 & dat_nomult$count > 1, 1, dat_nomult$drop)

head(data.frame(dat_nomult)[, c("dyadid", "year", "name", "useforce", "saltan", "useforce_max", "obs", "count", "drop")], n = 100)

# Distribution of HLIGOs

m1_eq <- useforce ~ igo_lev3_count_use_bc + salint + saltan + terriss + jointpol7 + rivalry_th + absidealdiff + atopally + obs + count 
m1_mf <- model.frame(m1_eq, data = dat_nomult)
m1_mf_sum <- mutate(arrange(group_by(m1_mf, igo_lev3_count_use_bc), obs),
                       ypos = 1:n())

p_claims_igo_hist <- ggplot(data = data.frame(m1_mf_sum), aes(x = as.factor(igo_lev3_count_use_bc), y = ypos)) + geom_point(aes(shape = as.factor(obs))) + xlab("Joint memberships in IGOs with high leverage") + ylab("Claim dyads") + theme_jk() + scale_shape_manual(values = c(1, 2, 0), name = "Claim onsets\nwithin dyad\nwithin same year") + theme(legend.position = c(0.7, 0.75),legend.title = element_text(size = 10))

ggsave(p_claims_igo_hist, file = "Output_Tables-and-Figures/claims_igo_hist.pdf", width = 3.75, height = 6)

dat_nomult <- dat_nomult[dat_nomult$drop == 0, ]

# Merge specific IGO data
igo_spec <- import("hl-igo_dyad.dta")

dat_spec <- merge(x = dat, y = igo_spec, by.x = c("dyadid", "year"), by.y = c("dyadid", "year"), all.x = TRUE, all.y = FALSE)
dat_spec$hligo_noWB <- dat_spec$igo_lev3_count_use_bc - dat_spec$io2400_bin
dat_spec$hligo_noWB <- ifelse(dat_spec$hligo_noWB < 0, 0, dat_spec$hligo_noWB)
dat_spec$hligo_noIMF <- dat_spec$igo_lev3_count_use_bc - dat_spec$io2880_bin
dat_spec$hligo_noIMF <- ifelse(dat_spec$hligo_noIMF < 0, 0, dat_spec$hligo_noIMF)
dat_spec$hligo_noWBIMF <- dat_spec$igo_lev3_count_use_bc - dat_spec$io2880_bin - dat_spec$io2400_bin
dat_spec$hligo_noWBIMF <- ifelse(dat_spec$hligo_noWBIMF < 0, 0, dat_spec$hligo_noWBIMF)
dat_spec$hligo_noIFAD <- dat_spec$igo_lev3_count_use_bc - dat_spec$io2760_bin
dat_spec$hligo_noIFAD <- ifelse(dat_spec$hligo_noIFAD < 0, 0, dat_spec$hligo_noIFAD)

dat_nomult_spec <- merge(x = dat_nomult, y = igo_spec, by.x = c("dyadid", "year"), by.y = c("dyadid", "year"), all.x = TRUE, all.y = FALSE)
dat_nomult_spec$hligo_noWB <- dat_nomult_spec$igo_lev3_count_use_bc - dat_nomult_spec$io2400_bin
dat_nomult_spec$hligo_noWB <- ifelse(dat_nomult_spec$hligo_noWB < 0, 0, dat_nomult_spec$hligo_noWB)
dat_nomult_spec$hligo_noIMF <- dat_nomult_spec$igo_lev3_count_use_bc - dat_nomult_spec$io2880_bin
dat_nomult_spec$hligo_noIMF <- ifelse(dat_nomult_spec$hligo_noIMF < 0, 0, dat_nomult_spec$hligo_noIMF)
dat_nomult_spec$hligo_noWBIMF <- dat_nomult_spec$igo_lev3_count_use_bc - dat_nomult_spec$io2880_bin - dat_nomult_spec$io2400_bin
dat_nomult_spec$hligo_noWBIMF <- ifelse(dat_nomult_spec$hligo_noWBIMF < 0, 0, dat_nomult_spec$hligo_noWBIMF)
dat_nomult_spec$hligo_noIFAD <- dat_nomult_spec$igo_lev3_count_use_bc - dat_nomult_spec$io2760_bin
dat_nomult_spec$hligo_noIFAD <- ifelse(dat_nomult_spec$hligo_noIFAD < 0, 0, dat_nomult_spec$hligo_noIFAD)